# Replication file for "Going the last Mile"

# Code necessary to replicate all tables and figures based on CRVS data
# Table 2, Figure B.2, Table B.5, Figure 4, Table C.5, Figure 6

# create directory "../replication files/going the last mile" to save files and output
# R version 4.2.1

# install did package (https://bcallaway11.github.io/did/)
# install.packages("did")
# install.packages("tidyverse")


library(readr)
library(readxl)
library(dplyr)
library(ggplot2)
library(tidyr)
library(did)

# MEXICO ------------------------------------------------------------------
veracruz_hidalgo_monthly <- read_csv("replication files/going the last mile/veracruz_hidalgo_monthly.csv")
mexico_annual <- read_csv("replication files/going the last mile/mexico_annual.csv")

# Hidalgo Veracruz Comparison
# Table 2, Model 1
# 60-70
BR_Mexico_monthly_did <- veracruz_hidalgo_monthly |>
  mutate(
    Pension_time = ifelse(regdate >= "2005-02-01", 1, 0),
    Veracruz_treatment = ifelse(entregis == "Veracruz", 1, 0),
    did = Pension_time * Veracruz_treatment
  )

didreg_60_70 <- lm(n_60_70 ~ Veracruz_treatment + Pension_time + did, data = BR_Mexico_monthly_did)
summary(didreg_60_70)

# Table 2, Model 2
# 70-80
BR_Mexico_monthly_did <- veracruz_hidalgo_monthly |>
  mutate(
    Pension_time = ifelse(regdate >= "2005-02-01", 1, 0),
    Veracruz_treatment = ifelse(entregis == "Veracruz", 1, 0),
    did = Pension_time * Veracruz_treatment
  )

didreg_70_80 <- lm(n_70_80 ~ Veracruz_treatment + Pension_time + did, data = BR_Mexico_monthly_did)
summary(didreg_70_80)

texreg::screenreg(list(didreg_60_70, didreg_70_80))
texreg::texreg(list(didreg_60_70, didreg_70_80), file = "replication files/going the last mile/table2.tex")


# Figure B.2
facet_names <- c(n_60_70 = "Cohort: 60 - 70", n_70_80 = "Cohort: 70 - 80")
veracruz_hidalgo_monthly |>
  pivot_longer(n_60_70:n_70_80, names_to = "cohort") |>
  ggplot(aes(x = regdate, y = value, linetype = entregis)) +
  geom_line() +
  geom_vline(xintercept = as.Date("2005-02-01")) +
  facet_wrap(vars(cohort), labeller = as_labeller(facet_names)) +
  labs(
    x = "Year-Month",
    y = "Registrations per Cohort",
    linetype = "State"
  ) +
  scale_x_date(date_breaks = "6 months", date_labels = "%Y-%m", guide = guide_axis(angle = 45)) +
  jtools::theme_apa() +
  theme(
    axis.title.x = element_blank()
  )

ggsave("replication files/going the last mile/figure b2.png")


# Mexico National differences-in-differences, Table B.5, Figure 4

BR_Mexico_did <- mexico_annual |>
   pivot_longer(
    cols = starts_with("n_"),
    names_to = "cohort",
    names_prefix = "n_",
    values_to = "count"
  ) |>
  filter(cohort %in% c("70_80", "80_100", "50_60", "60_70")) |>
  mutate(
    first.treat = if_else(cohort == "70_80" | cohort == "80_100", 2006, 0),
    region_cohort = as.numeric(paste0(as.numeric(as.factor(entregis)), as.numeric(as.factor(cohort))))
  )

did <- att_gt(
  yname = "count",
  gname = "first.treat",
  idname = "region_cohort",
  tname = "year",
  xformla <- ~1,
  panel = TRUE,
  control_group = "notyettreated",
  data = BR_Mexico_did
)
# Table B.5
# Note that coefficients are the same, but confidence intervals vary
summary(did)
tidy(did) |>
  select(-term, -point.conf.low, -point.conf.high) |>
  kableExtra::kable(format = "latex") |>
  kableExtra::save_kable("replication files/going the last mile/table b5.tex")

# Figure 4
ggdid(did)

width <- 0.1
tidy(did) |>
  mutate(post = ifelse(time >= 2006, 0, 1)) |>
  ggplot(aes(x = time, y = estimate, linetype = factor(post))) +
  geom_hline(aes(yintercept = 0), linetype = "dashed") +
  geom_segment(aes(x = time - width, xend = time + width, y = conf.low, yend = conf.low),
    linewidth = 0.5, linetype = "solid"
  ) +
  geom_segment(aes(x = time - width, xend = time + width, y = conf.high, yend = conf.high),
    linewidth = 0.5, linetype = "solid"
  ) +
  geom_linerange(aes(ymin = conf.low, ymax = conf.high), linewidth = 0.5) +
  scale_x_continuous(breaks = seq(2004, 2008, by = 1)) +
  ylab("Average Treatment Effect") +
  scale_linetype_discrete(
    breaks = c("1", "0"),
    labels = stringr::str_wrap(c("Pre", "Post"), 20)
  ) +
  jtools::theme_apa() +
  theme(axis.title.x = element_blank())

ggsave("replication files/going the last mile/figure 4.png")


# BOLIVIA -----------------------------------------------------------------
# Table C.5
# Figure 6

BR_Bolivia_92_01 <- read_csv("replication files/going the last mile/br_bolivia_92_01.csv")

BR_Bolivia_92_01 <- BR_Bolivia_92_01 |>
  mutate(first.treat = if_else(cohort == "65-120", 1997, 0))

BR_Bolivia_92_01$region_cohort <- as.numeric(paste0(
  as.numeric(as.factor(BR_Bolivia_92_01$department)),
  as.numeric(as.factor(BR_Bolivia_92_01$cohort))
))

did <- att_gt(
  yname = "count",
  gname = "first.treat",
  idname = "region_cohort",
  tname = "year",
  xformla <- ~1,
  panel = TRUE,
  control_group = "notyettreated",
  data = BR_Bolivia_92_01
)
# Table C.5
# Note that coefficients are the same, but confidence intervals vary
summary(did)

tidy(did) |>
  select(-term, -point.conf.low, -point.conf.high) |>
  kableExtra::kable(format = "latex") |>
  kableExtra::save_kable("replication files/going the last mile/table c5.tex")

# Figure 6
ggdid(did)

width <- 0.1

tidy(did) |>
  mutate(post = ifelse(time >= 1997, 0, 1)) |>
  ggplot(aes(x = time, y = estimate, linetype = factor(post))) +
  geom_hline(aes(yintercept = 0), linetype = "dashed") +
  geom_segment(aes(x = time - width, xend = time + width, y = conf.low, yend = conf.low),
    linewidth = 0.5, linetype = "solid"
  ) +
  geom_segment(aes(x = time - width, xend = time + width, y = conf.high, yend = conf.high),
    linewidth = 0.5, linetype = "solid"
  ) +
  geom_linerange(aes(ymin = conf.low, ymax = conf.high), linewidth = 0.5) +
  scale_x_continuous(breaks = seq(1993, 2001, by = 1)) +
  ylab("Average Treatment Effect") +
  scale_linetype_discrete(
    breaks = c("1", "0"),
    labels = stringr::str_wrap(c("Pre", "Post"), 20)
  ) +
  jtools::theme_apa() +
  theme(axis.title.x = element_blank())

ggsave("replication files/going the last mile/figure 6.png")
